
import pandas as pd
import re
path = r'C:\Users\Luther\Desktop\output\edu.csv'
df = pd.read_csv(path, encoding="utf-8")

num = re.compile(r'\d+\.?\d+%?')

df['num_tech'] = None
df['num_prod'] = None
df['num_phd'] = None
df['num_ma'] = None
df['num_bac'] = None


for i in range(len(df)):
    text = df.content[i]
    try:
        sp = text.find("技术")
        if sp==-1:
            sp = text.index("研发")
    except:
        pass
    else:
        tmptext = text[sp:]
        ret = num.search(tmptext)
        if ret:
            df.at[i, 'num_tech']=ret.group(0)

    text = df.content[i]
    try:
        sp = text.index("生产")
    except:
        pass
    else:
        tmptext = text[sp:]
        ret = num.search(tmptext)
        if ret:
            df.at[i, 'num_prod']=ret.group(0)
    
    try:
        sp = text.index("本科")
    except:
        pass
    else:
        tmptext = text[sp:]
        ret = num.search(tmptext)
        if ret:
            df.at[i, 'num_bac']=ret.group(0)

    try:
        sp = text.find("硕士")
        if sp==-1:
            sp=text.index('研究生')
    except:
        pass
    else:
        tmptext = text[sp:]
        ret = num.search(tmptext)
        if ret:
            df.at[i, 'num_ma']=ret.group(0)

    try:
        sp = text.index("博士")
    except:
        pass
    else:
        tmptext = text[sp:]
        ret = num.search(tmptext)
        if ret:
            df.at[i, 'num_phd']=ret.group(0)

df.to_csv(path, index=False)